{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "dying-modem",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import openpyxl\n",
    "from openpyxl import load_workbook\n",
    "from openpyxl.styles import Font\n",
    "from openpyxl.chart import BarChart, Reference\n",
    "import string"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "enormous-counter",
   "metadata": {},
   "source": [
    "# Make a Pivot Table with Pandas"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "manufactured-adolescent",
   "metadata": {},
   "source": [
    "## Read the Excel files"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "substantial-jewel",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Gender</th>\n",
       "      <th>Product line</th>\n",
       "      <th>Total</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Female</td>\n",
       "      <td>Health and beauty</td>\n",
       "      <td>548.9715</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Female</td>\n",
       "      <td>Electronic accessories</td>\n",
       "      <td>80.2200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Male</td>\n",
       "      <td>Home and lifestyle</td>\n",
       "      <td>340.5255</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Male</td>\n",
       "      <td>Health and beauty</td>\n",
       "      <td>489.0480</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Male</td>\n",
       "      <td>Sports and travel</td>\n",
       "      <td>634.3785</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>995</th>\n",
       "      <td>Male</td>\n",
       "      <td>Health and beauty</td>\n",
       "      <td>42.3675</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>996</th>\n",
       "      <td>Female</td>\n",
       "      <td>Home and lifestyle</td>\n",
       "      <td>1022.4900</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>997</th>\n",
       "      <td>Male</td>\n",
       "      <td>Food and beverages</td>\n",
       "      <td>33.4320</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>998</th>\n",
       "      <td>Male</td>\n",
       "      <td>Home and lifestyle</td>\n",
       "      <td>69.1110</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>999</th>\n",
       "      <td>Female</td>\n",
       "      <td>Fashion accessories</td>\n",
       "      <td>649.2990</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1000 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     Gender            Product line      Total\n",
       "0    Female       Health and beauty   548.9715\n",
       "1    Female  Electronic accessories    80.2200\n",
       "2      Male      Home and lifestyle   340.5255\n",
       "3      Male       Health and beauty   489.0480\n",
       "4      Male       Sports and travel   634.3785\n",
       "..      ...                     ...        ...\n",
       "995    Male       Health and beauty    42.3675\n",
       "996  Female      Home and lifestyle  1022.4900\n",
       "997    Male      Food and beverages    33.4320\n",
       "998    Male      Home and lifestyle    69.1110\n",
       "999  Female     Fashion accessories   649.2990\n",
       "\n",
       "[1000 rows x 3 columns]"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "excel_file = pd.read_excel('supermarket_sales.xlsx')\n",
    "excel_file[['Gender', 'Product line', 'Total']]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "acute-terminology",
   "metadata": {},
   "source": [
    "## Make a pivot table"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "iraqi-palmer",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>Product line</th>\n",
       "      <th>Electronic accessories</th>\n",
       "      <th>Fashion accessories</th>\n",
       "      <th>Food and beverages</th>\n",
       "      <th>Health and beauty</th>\n",
       "      <th>Home and lifestyle</th>\n",
       "      <th>Sports and travel</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Gender</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Female</th>\n",
       "      <td>27102.0</td>\n",
       "      <td>30437.0</td>\n",
       "      <td>33171.0</td>\n",
       "      <td>18561.0</td>\n",
       "      <td>30037.0</td>\n",
       "      <td>28575.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Male</th>\n",
       "      <td>27236.0</td>\n",
       "      <td>23868.0</td>\n",
       "      <td>22974.0</td>\n",
       "      <td>30633.0</td>\n",
       "      <td>23825.0</td>\n",
       "      <td>26548.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "Product line  Electronic accessories  Fashion accessories  Food and beverages  \\\n",
       "Gender                                                                          \n",
       "Female                       27102.0              30437.0             33171.0   \n",
       "Male                         27236.0              23868.0             22974.0   \n",
       "\n",
       "Product line  Health and beauty  Home and lifestyle  Sports and travel  \n",
       "Gender                                                                  \n",
       "Female                  18561.0             30037.0            28575.0  \n",
       "Male                    30633.0             23825.0            26548.0  "
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "report_table = excel_file.pivot_table(index='Gender',\n",
    "                                      columns='Product line',\n",
    "                                      values='Total',\n",
    "                                      aggfunc='sum').round(0)\n",
    "report_table"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "printable-current",
   "metadata": {},
   "source": [
    "## Exporting pivot table to Excel file"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "suburban-mouth",
   "metadata": {},
   "outputs": [],
   "source": [
    "report_table.to_excel('report_2021.xlsx',\n",
    "                      sheet_name='Report',\n",
    "                      startrow=4)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "motivated-somerset",
   "metadata": {},
   "source": [
    "# Make The Report with Openpyxl"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "anonymous-assurance",
   "metadata": {},
   "source": [
    "## Creating row and column reference"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "eastern-click",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Min Columns: 1\n",
      "Max Columns: 7\n",
      "Min Rows: 5\n",
      "Max Rows: 7\n"
     ]
    }
   ],
   "source": [
    "wb = load_workbook('report_2021.xlsx')\n",
    "sheet = wb['Report']\n",
    "\n",
    "# cell references (original spreadsheet) \n",
    "min_column = wb.active.min_column\n",
    "max_column = wb.active.max_column\n",
    "min_row = wb.active.min_row\n",
    "max_row = wb.active.max_row\n",
    "\n",
    "print(f'Min Columns: {min_column}')\n",
    "print(f'Max Columns: {max_column}')\n",
    "print(f'Min Rows: {min_row}')\n",
    "print(f'Max Rows: {max_row}')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "korean-dimension",
   "metadata": {},
   "source": [
    "## Adding Excel charts through Python"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "incident-return",
   "metadata": {},
   "outputs": [],
   "source": [
    "wb = load_workbook('report_2021.xlsx')\n",
    "sheet = wb['Report']\n",
    "# barchart\n",
    "barchart = BarChart()\n",
    "#locate data and categories\n",
    "data = Reference(sheet,\n",
    "                 min_col=min_column+1,\n",
    "                 max_col=max_column,\n",
    "                 min_row=min_row,\n",
    "                 max_row=max_row) #including headers\n",
    "categories = Reference(sheet,\n",
    "                       min_col=min_column,\n",
    "                       max_col=min_column,\n",
    "                       min_row=min_row+1,\n",
    "                       max_row=max_row) #not including headers\n",
    "# adding data and categories\n",
    "barchart.add_data(data, titles_from_data=True)\n",
    "barchart.set_categories(categories)\n",
    "#location chart\n",
    "sheet.add_chart(barchart, \"B12\")\n",
    "barchart.title = 'Sales by Product line'\n",
    "barchart.style = 5 #choose the chart style\n",
    "wb.save('report_2021.xlsx')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "illegal-benjamin",
   "metadata": {},
   "source": [
    "## Applying Excel Formulas through Python"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "written-trout",
   "metadata": {},
   "source": [
    "### Applying formulas manually"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "cardiovascular-illness",
   "metadata": {},
   "outputs": [],
   "source": [
    "# wb = load_workbook('report_2021.xlsx')\n",
    "# sheet = wb['Report']\n",
    "\n",
    "# sheet['B7'] = '=SUM(B5:B6)'\n",
    "# sheet['B7'].style = 'Currency'\n",
    "\n",
    "# wb.save('report_2021.xlsx')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "accurate-round",
   "metadata": {},
   "source": [
    "### Applying formulas with cell references"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "electoral-macedonia",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['A', 'B', 'C', 'D', 'E', 'F', 'G']"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import string\n",
    "alphabet = list(string.ascii_uppercase)\n",
    "excel_alphabet = alphabet[0:max_column] #note: Python lists start on 0 -> A=0, B=1, C=2. #note2 the [a:b] takes b-a elements\n",
    "excel_alphabet"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "former-mention",
   "metadata": {},
   "outputs": [],
   "source": [
    "wb = load_workbook('report_2021.xlsx')\n",
    "sheet = wb['Report']\n",
    "# sum in columns B-G\n",
    "for i in excel_alphabet:\n",
    "    if i!='A':\n",
    "        sheet[f'{i}{max_row+1}'] = f'=SUM({i}{min_row+1}:{i}{max_row})'\n",
    "        sheet[f'{i}{max_row+1}'].style = 'Currency'\n",
    "\n",
    "# adding total label\n",
    "sheet[f'{excel_alphabet[0]}{max_row+1}'] = 'Total'\n",
    "wb.save('report_2021.xlsx')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "domestic-trail",
   "metadata": {},
   "source": [
    "## Formatting the report sheet"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "mighty-gates",
   "metadata": {},
   "outputs": [],
   "source": [
    "wb = load_workbook('report_2021.xlsx')\n",
    "sheet = wb['Report']\n",
    "\n",
    "sheet['A1'] = 'Sales Report'\n",
    "sheet['A2'] = '2021'\n",
    "sheet['A1'].font = Font('Arial', bold=True, size=20)\n",
    "sheet['A2'].font = Font('Arial', bold=True, size=10)\n",
    "\n",
    "wb.save('report_2021.xlsx')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "running-internship",
   "metadata": {},
   "source": [
    "# Automating the Report with a Python Function"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "hollywood-frame",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import openpyxl\n",
    "from openpyxl import load_workbook\n",
    "from openpyxl.styles import Font\n",
    "from openpyxl.chart import BarChart, Reference\n",
    "import string\n",
    "\n",
    "def automate_excel(file_name):\n",
    "    \"\"\"The file name should have the following structure: sales_month.xlsx\"\"\"\n",
    "    # read excel file\n",
    "    excel_file = pd.read_excel(file_name)\n",
    "    # make pivot table\n",
    "    report_table = excel_file.pivot_table(index='Gender', columns='Product line', values='Total', aggfunc='sum').round(0)\n",
    "    # splitting the month and extension from the file name\n",
    "    month_and_extension = file_name.split('_')[1]\n",
    "    # send the report table to excel file\n",
    "    report_table.to_excel(f'report_{month_and_extension}', sheet_name='Report', startrow=4)\n",
    "    # loading workbook and selecting sheet\n",
    "    wb = load_workbook(f'report_{month_and_extension}')\n",
    "    sheet = wb['Report']\n",
    "    # cell references (original spreadsheet)\n",
    "    min_column = wb.active.min_column\n",
    "    max_column = wb.active.max_column\n",
    "    min_row = wb.active.min_row\n",
    "    max_row = wb.active.max_row\n",
    "    # adding a chart\n",
    "    barchart = BarChart()\n",
    "    data = Reference(sheet, min_col=min_column+1, max_col=max_column, min_row=min_row, max_row=max_row) #including headers\n",
    "    categories = Reference(sheet, min_col=min_column, max_col=min_column, min_row=min_row+1, max_row=max_row) #not including headers\n",
    "    barchart.add_data(data, titles_from_data=True)\n",
    "    barchart.set_categories(categories)\n",
    "    sheet.add_chart(barchart, \"B12\") #location chart\n",
    "    barchart.title = 'Sales by Product line'\n",
    "    barchart.style = 2 #choose the chart style\n",
    "    # applying formulas\n",
    "    # first create alphabet list as references for cells\n",
    "    alphabet = list(string.ascii_uppercase)\n",
    "    excel_alphabet = alphabet[0:max_column] #note: Python lists start on 0 -> A=0, B=1, C=2. #note2 the [a:b] takes b-a elements\n",
    "    # sum in columns B-G\n",
    "    for i in excel_alphabet:\n",
    "        if i!='A':\n",
    "            sheet[f'{i}{max_row+1}'] = f'=SUM({i}{min_row+1}:{i}{max_row})'\n",
    "            sheet[f'{i}{max_row+1}'].style = 'Currency'\n",
    "    sheet[f'{excel_alphabet[0]}{max_row+1}'] = 'Total'\n",
    "    # getting month name\n",
    "    month_name = month_and_extension.split('.')[0]\n",
    "    # formatting the report\n",
    "    sheet['A1'] = 'Sales Report'\n",
    "    sheet['A2'] = month_name.title()\n",
    "    sheet['A1'].font = Font('Arial', bold=True, size=20)\n",
    "    sheet['A2'].font = Font('Arial', bold=True, size=10)\n",
    "    wb.save(f'report_{month_and_extension}')\n",
    "    return "
   ]
  },
  {
   "cell_type": "markdown",
   "id": "horizontal-radical",
   "metadata": {},
   "source": [
    "## Applying the function to a single Excel file"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "white-brass",
   "metadata": {},
   "outputs": [],
   "source": [
    "automate_excel('sales_2021.xlsx')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "knowing-toddler",
   "metadata": {},
   "source": [
    "## Applying the function to multiple Excel files"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "noble-summit",
   "metadata": {},
   "outputs": [],
   "source": [
    "# option 1: apply the formula one by one to get 3 reports\n",
    "automate_excel('sales_january.xlsx')\n",
    "automate_excel('sales_february.xlsx')\n",
    "automate_excel('sales_march.xlsx')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "active-boulder",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# option 2: concatenate them first using pd.concat() and then apply the function only once.\n",
    "excel_file_1 = pd.read_excel('sales_january.xlsx')\n",
    "excel_file_2 = pd.read_excel('sales_february.xlsx')\n",
    "excel_file_3 = pd.read_excel('sales_march.xlsx')\n",
    "\n",
    "new_file = pd.concat([excel_file_1,\n",
    "                      excel_file_2,\n",
    "                      excel_file_3], ignore_index=True)\n",
    "new_file.to_excel('sales_2021.xlsx')\n",
    "automate_excel('sales_2021.xlsx')"
   ]
  }
 ],
 "metadata": {
  "hide_input": false,
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  },
  "varInspector": {
   "cols": {
    "lenName": 16,
    "lenType": 16,
    "lenVar": 40
   },
   "kernels_config": {
    "python": {
     "delete_cmd_postfix": "",
     "delete_cmd_prefix": "del ",
     "library": "var_list.py",
     "varRefreshCmd": "print(var_dic_list())"
    },
    "r": {
     "delete_cmd_postfix": ") ",
     "delete_cmd_prefix": "rm(",
     "library": "var_list.r",
     "varRefreshCmd": "cat(var_dic_list()) "
    }
   },
   "types_to_exclude": [
    "module",
    "function",
    "builtin_function_or_method",
    "instance",
    "_Feature"
   ],
   "window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
